# delimit ;
set more off ;
capture log close ;
clear all ;

/* MASTER FILE FOR EPA WQ GRANT FINAL SP SURVEY DATA PROCESSING AND ANALYSIS */
/* AUTHOR: ROGER H. VON HAEFEN  */
/* LAST EDITED: OCTOBER 1, 2021 */


log using $analysis_log, replace text ;
log off ;

/* CHOICE MODELING - DATA PREPARATION AND ANALYSIS */
/* ALSO EXPORT DATA TO GAUSS */

use $imputed_data, replace ;
merge 1:1 code using $weights_data ;
assert _merge == 3 ; drop _merge ;

local demos = "access_mode survey_duration near* block_f county gender age income employ_status own_rent water_bill educ college adults kids resident native asian black white hawaii other hisp_latino phone debrief_* wgt_final" ;


rename cost_a cost1 ;
rename cost_b cost2 ;
rename cost_c cost3 ;
rename cost_d cost4 ;

rename ec_g_a ec_g1 ;
rename ec_g_b ec_g2 ;
rename ec_g_c ec_g3 ;
rename ec_g_d ec_g4 ;

rename ec_m_a ec_m1 ;
rename ec_m_b ec_m2 ;
rename ec_m_c ec_m3 ;
rename ec_m_d ec_m4 ;

rename ec_p_a ec_p1 ;
rename ec_p_b ec_p2 ;
rename ec_p_c ec_p3 ;
rename ec_p_d ec_p4 ;

rename hr_g_a hr_g1 ;
rename hr_g_b hr_g2 ;
rename hr_g_c hr_g3 ;
rename hr_g_d hr_g4 ;

rename hr_m_a hr_m1 ;
rename hr_m_b hr_m2 ;
rename hr_m_c hr_m3 ;
rename hr_m_d hr_m4 ;

rename hr_p_a hr_p1 ;
rename hr_p_b hr_p2 ;
rename hr_p_c hr_p3 ;
rename hr_p_d hr_p4 ;

rename md_g_a md_g1 ;
rename md_g_b md_g2 ;
rename md_g_c md_g3 ;
rename md_g_d md_g4 ;

rename md_m_a md_m1 ;
rename md_m_b md_m2 ;
rename md_m_c md_m3 ;
rename md_m_d md_m4 ;

rename md_p_a md_p1 ;
rename md_p_b md_p2 ;
rename md_p_c md_p3 ;
rename md_p_d md_p4 ;

rename ce_a ce1 ;
rename ce_b ce2 ;
rename ce_c ce3 ;
rename ce_d ce4 ;

rename order_a order1 ;
rename order_b order2 ;
rename order_c order3 ;
rename order_d order4 ;

sort resp_id `demos' ;

/* SUMMARIZING DATA */

log on ;

tab gender ;
tab county ;
sum income, detail ;
tab covid19 ;
tab debrief_1 ;
tab debrief_2 ;
tab debrief_3 ;
tab debrief_4 ;
tab debrief_5 ;
tab debrief_6 ;
tab debrief_7 ;
tab debrief_8 ;
tab debrief_9 ;
tab debrief_10 ;
tab debrief_11 ;

tab debrief_9 ;
tab debrief_9 if inlist(debrief_10,"SA","A") ;
tab debrief_9 if inlist(college,1) ;


/* MOST / LEAST IMPORTANT ATTRIBUTES BROKEN DOWN BY WHETHER ATTRIBUTES WERE PRESENTED IN FORWARD/REVERSE DIRECTION */
/* MOST / LEAST IMPORTANT ATTRIBUTES BROKEN DOWN BY WHETHER ATTRIBUTES WERE PRESENTED IN FORWARD/REVERSE DIRECTION */
/* MOST / LEAST IMPORTANT ATTRIBUTES BROKEN DOWN BY WHETHER ATTRIBUTES WERE PRESENTED IN FORWARD/REVERSE DIRECTION */

d block_f ;

/* MOST IMPORTANT ATTRIBUTE */
/* MOST IMPORTANT ATTRIBUTE */
/* MOST IMPORTANT ATTRIBUTE */

tab attribute_1 ;
tab attribute_1 if block_f == 1 ;
tab attribute_1 if block_f == 0 ;

/* LEAST IMPORTANT ATTRIBUTE */
/* LEAST IMPORTANT ATTRIBUTE */
/* LEAST IMPORTANT ATTRIBUTE */

tab attribute_3 ;
tab attribute_3 if block_f == 1  ;
tab attribute_3 if block_f == 0  ;


/* SERIAL NONPARTICIPATION */

gen serial_nonp_y = ce1*ce2*ce3*ce4 ;  label var serial_nonp_y "Serial Nonparticipant - Always Yes" ;
gen serial_nonp_n = (1-ce1)*(1-ce2)*(1-ce3)*(1-ce4) ;  label var serial_nonp_n "Serial Nonparticipant - Always No" ;
d serial_nonp_y serial_nonp_n ;
tab serial_nonp_y serial_nonp_n ;
sum serial_nonp_y serial_nonp_n ;


gen test = (ce1 == -1) | (ce2 == -1) | (ce3 == -1) | (ce4 == -1) ;
tab test ;
drop test ;
di _N ;

keep resp_id `demos' cost* ce* order* ec_g* ec_m* ec_p* hr_g* hr_m* hr_p* md_g* md_m* md_p* serial* ;
reshape long cost ce order ec_g ec_m ec_p hr_g hr_m hr_p md_g md_m md_p, i(resp_id) j(seq) ;
label var seq "CE Sequence ID (not the same as order presented - see ORDER variable)" ;

di _N ;
drop if ce == -1 ; /* CHECK - SENSITIVITY WOULD BE TO DROP ENTIRE OBSERVATION */ 
di _N ;

by resp_id : gen test = _N  if _n == _N ;
tab test ;
drop test ;
log off ;


label var cost "Action Plan Cost" ;
label var ce "Action Plan Vote (1 = Yes, 0 = No)" ;
label var order "Ordering of CEs (1 = First, 4 = Last)" ;
label var ec_g "Ecosystem Conditions = % Good w/ Action Plan" ;
label var ec_m "Ecosystem Conditions = % Fair w/ Action Plan" ;
label var ec_p "Ecosystem Conditions = % Poor w/ Action Plan" ;
label var hr_g "Health Risk = % Good w/ Action Plan" ;
label var hr_m "Health Risk = % Fair w/ Action Plan" ;
label var hr_p "Health Risk = % Poor w/ Action Plan" ;
label var md_g "Murky Water Days = % Good w/ Action Plan" ;
label var md_m "Murky Water Days = % Fair w/ Action Plan" ;
label var md_p "Murky Water Days = % Poor w/ Action Plan" ;

sort cost ;
log on ;

tab ce cost ;
tab ce cost if order == 1 ;
tab ce cost if order == 2 ;
tab ce cost if order == 3 ;
tab ce cost if order == 4 ;

/* PERCENT OF PEOPLE WHO SAY YES */
/* PERCENT OF PEOPLE WHO SAY YES */
/* PERCENT OF PEOPLE WHO SAY YES */

by cost: sum ce ;
/* QUESTION #1 */
/* QUESTION #1 */
/* QUESTION #1 */
by cost: sum ce if order == 1 ;
/* QUESTION #2 */
/* QUESTION #2 */
/* QUESTION #2 */
by cost: sum ce if order == 2 ;
/* QUESTION #3 */
/* QUESTION #3 */
/* QUESTION #3 */
by cost: sum ce if order == 3 ;
/* QUESTION #4 */
/* QUESTION #4 */
/* QUESTION #4 */
by cost: sum ce if order == 4 ;

log off ;

d block_f ;
sum block_f ;

/* FIRST CE, ATTRIBUTES IN FORWARD ORDER */
/* FIRST CE, ATTRIBUTES IN FORWARD ORDER */
/* FIRST CE, ATTRIBUTES IN FORWARD ORDER */

/* ACCESS MODE (=1 IF COMPUTER/TABLET) */
d access_mode ;
tab access_mode ;

save $estimation_data, replace ;

preserve ;

log on ;

sum survey_duration, detail ;
tab debrief_3 ;
tab debrief_1 ;
tab debrief_2 ;

/* DROPPING:										*/
/*    1) SPEEDING PEOPLE (<=8 MINUTES TO COMPLETE SURVEY)				*/
/*    2) PEOPLE WHO TAKE MORE THAN A WEEK TO COMPLETE THE SURVEY			*/
/*    3) PEOPLE WHO FAIL THE TRAP QUESTION						*/
/*    4) PEOPLE WHO DISAGREE OR STRONGLY DISAGREE WITH THE CONSEQUENTIALITY STATEMENTS  */

keep if survey_duration >= 8 & survey_duration <= (7*24*60) & inlist(debrief_3,"D") & !inlist(debrief_1,"D","SD") & !inlist(debrief_2,"D","SD") ;

sort resp_id seq ;
by resp_id : assert wgt_final[1] == wgt_final ;

gen double inv_dist = (1/near_dist) ;
gen cost_dis = cost/(1+near_dist) ;
logit ce cost          ec_g ec_p hr_g hr_p md_g md_p age gender white college [pw=wgt_final], cluster(resp_id) ;
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/_b[cost] ;                               /* WTP TO CLEAN UP ONE MILE OF STREAM FROM WORST TO BEST CATEGORIES */

logit ce cost cost_dis ec_g ec_p hr_g hr_p md_g md_p age gender white college [pw=wgt_final], cluster(resp_id) ;
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/(_b[cost]+_b[cost_dis]/(1+ 9.584872)) ;  /* WTP FOR PERSON LIVING AVERAGE DISTANCE FROM CENTROID CLEANED-UP STREAMS */
nlcom wtp : -12*(_b[ec_g]-_b[ec_p]+_b[hr_g]-_b[hr_p]+_b[md_g]-_b[md_p])/(_b[cost]+_b[cost_dis]/(1+ 1)) ;         /* WTP FOR PERSON LIVING ONE MILE         FROM CENTROID CLEANED-UP STREAMS */

log off ;


/* EXPORTING DATA TO TXT FILE THAT CAN BE READ INTO GAUSS */
/* EXPORTING DATA TO TXT FILE THAT CAN BE READ INTO GAUSS */
/* EXPORTING DATA TO TXT FILE THAT CAN BE READ INTO GAUSS */

list resp_id if access_mode == . ;
replace access_mode = 1 if access_mode == . ;  /* CHECK */

sort resp_id seq ;
by resp_id : gen ce_cnt = _N ;  label var ce_cnt "# of choice occasions" ;

sort resp_id seq ;
egen nonpart = sum(ce), by(resp_id) ;
replace nonpart = (nonpart==0)*(ce_cnt==4) ;  label var nonpart "= 1 if serial nonparticipant" ;
drop access_mode ;

order resp_id seq ce_cnt nonpart ;
keep 
resp_id 
seq 
ce_cnt
nonpart    
block_f 
wgt_final
near_dist
gender          
age             
income          
employ_status   
own_rent        
water_bill      
educ   
college
adults          
kids            
resident        
native          
asian           
black           
white           
other           
hisp_latino     
phone           
cost            
ce              
order           
ec_g            
ec_m            
ec_p            
hr_g            
hr_m            
hr_p            
md_g            
md_m            
md_p  ;

log on ;
d ;
sum ;
log off ;

outsheet using gauss\final_data.txt, nonames noquote replace ;

keep resp_id ce_cnt wgt_final nonpart ;
by resp_id : keep if _n == 1 ;

outsheet using gauss\final_data1.txt, nonames noquote replace ;

log on ;
d ;
sum ;
log off ;





